
rm(list = ls())
## llamando a las librerias que (creemos que) vamos a usar
options("install.lock"=FALSE)
if (!require ("pacman")) install.packages ("pacman")

pacman::p_load (dplyr
                , haven
                , ggplot2
                , ggpubr
                , ggthemes  ## si quieren mas themes
                , readstata13
                , readxl
                , sf
                , tidyverse
                , tidyr
                , units
                , viridis ## paleta de colores Viridis
                , wesanderson## p/usar paleta de colores de Wes Anderson
                , stringr
                , RColorBrewer
                , patchwork
                , Rmisc
                , lfe
                , stargazer
                , AER
                , haven
                , skimr
                , modelsummary
                , terra
                , fixest
                , vtable
                , did
                , cowplot
                , grid
                , didimputation)

setwd("/Users/mateovasquez/Dropbox/")

#####

base <- read.dta13("master_violence_landtitle_caracteristicas.dta")

#//3. Diff-in-Diff (Benchmark Specification) - Disaggregated 

#Outcome Variable
names(base)

base <- base %>% 
  dplyr::mutate(total_violence = rowSums(across(c(sumattacks_public, sumattacks_guerrilla,sumattacks_paramilitary)), na.rm = TRUE),
                health_inst = rowSums(across(c(health_post, health_center,hospital)), na.rm = TRUE))


#Renaming Variables 

base <- base %>% 
  dplyr::rename("indian_title_area_cs" = "area_resguardos_indigenas_cs",
                "indian_title_area" = "area_resguardos_indigenas",
                "afro_title_area_cs" = "area_titul_comunidades_negras_cs",
                "afro_title_area" = "area_titul_comunidades_negras")

#Treatment 	

base <- base %>% 
  dplyr::mutate(post = ifelse(Año>1995,1,0),
                any_titleXpost = any_title*post,
                any_title2 = any_title,
                post2 = post)


#the mean level of violence in neighboring municipalities (1988–1995)
Neighbors <- base %>% 
  dplyr::filter(Año<=1995) %>% 
  dplyr::group_by(codmpio) %>% 
  dplyr::summarise(spatlagguerrsum100_100_1995 = mean(spatlagguerrsum100_100, na.rm = T))

summary(Neighbors$spatlagguerrsum100_100_1995)

base <- left_join(base,Neighbors,
                  by = "codmpio")

#Controls X post
base <- base %>%
  dplyr::mutate(
    altura_post = altura*post, #ELEVATION
    rainfall_post = rainfall*post, #RAINFALL
    coca_0_1_post = coca_0_1*post, # COCA
    presence_mines1560_post = presence_mines1560*post, #GOLD MINES
    num_encomiendas1560_post = num_encomiendas1560*post, #ENCOMIENDAS
    royalroaddist_post = royalroaddist*post, #distance to royal roads
    distancia_mercado_post = distancia_mercado*post, #nearest market towns (km)
    pop95_post = pop95*post, #municipal population in 1995 
    pct_minority85_post = pct_minority85*post, #share of minority population in 1985 (%)
    conflictos_1901_1917_post = conflictos_1901_1917*post,#historical conflict using data on the inci- dence of land conflict between 1901 and 1931
    conflictos_1918_1931_post = conflictos_1918_1931*post,#historical conflict using data on the inci- dence of land conflict between 1901 and 1931
    Violencia_48_a_53_post = Violencia_48_a_53*post, #presence of armed combat during La Violencia (1948–1953)
    anucraids1971_78_post = anucraids1971_78*post, #frequency of land invasions between 1971 and 1978
    spatlagguerrsum100_100_1995_post = spatlagguerrsum100_100_1995*post, #the mean level of violence in neighboring municipalities (1988–1995)
    ltotal_plots_rfmd_1960_85_post = ltotal_plots_rfmd_1960_85*post, # total number of plots reformed between 1960 and 1985 to proxy for rural grievances.
    totviolencia85_post = totviolencia85*post) #ojo: La Violencia (1948 - 1958)

# Applying asinh transformation to each specified variable directly in mutate

base <- base %>%
  mutate(
    as_total_violence = asinh(total_violence),
    as_sumattacks_public = asinh(sumattacks_public),
    as_sumattacks_paramilitary = asinh(sumattacks_paramilitary),
    as_sumattacks_guerrilla = asinh(sumattacks_guerrilla))

# Running the regression with felm, including fixed effects and clustering

base <- base %>%
  mutate(Depto_year = paste(coddepto, Año, sep = "_"))
#####

base <- base %>% 
  dplyr::group_by(codmpio,Año) %>% 
  dplyr::mutate(first_title = sum(landtitle == 1)) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(codmpio) %>% 
  dplyr::mutate(Validation_year = ifelse(first_title == 1, Año,NA),
                first_title_year = min(Validation_year, na.rm = TRUE)) %>% 
  dplyr::ungroup()

base$first_title_year[base$first_title_year == Inf] <- 0

view(base %>% 
       dplyr::select(Año, codmpio, landtitle, first_title, Validation_year,first_title_year))

base <- base %>% 
  dplyr::mutate(time_to_event = Año - first_title_year,
                time_to_event = ifelse(first_title_year ==0 ,0,time_to_event))

base_callaway <- base %>% 
  dplyr::filter(gpacifica ==1) %>% 
  dplyr::mutate(time_to_event = Año - first_title_year,
                time_to_event = ifelse(first_title_year ==0 ,0,time_to_event))

out <- att_gt(yname = "as_sumattacks_paramilitary",
              gname = "first_title_year",
              idname = "codmpio",
              tname = "Año",
              clustervars = "codmpio", 
              control_group = c("notyettreated"),
              xformla = ~1,
              data = base_callaway,
              est_method = "reg")

es <- aggte(out, type = "dynamic", na.rm = T)

# Extract event-study data from 'es'
event_study <- data.frame(
  time = es$egt,                  # Event time periods
  att = es$att.egt,              # Average Treatment Effects (ATT)
  se = es$se.egt                 # Standard errors
)

# Add confidence intervals
event_study <- event_study %>%
  mutate(
    LB = att - 1.96 * se,         # Lower bound of 95% CI
    UB = att + 1.96 * se,         # Upper bound of 95% CI
    treatment_period = time >= 0  # Pre/Post treatment indicator
  )

# Creating the plot
callaway_plot <- ggplot(event_study, aes(x = time, y = att)) +
  # Pre-treatment confidence intervals as bars
  geom_errorbar(data = subset(event_study, treatment_period == FALSE), 
                aes(ymin = LB, ymax = UB), 
                color = "blue", width = 0.4, alpha = 0.6) +
  # Post-treatment confidence intervals as bars
  geom_errorbar(data = subset(event_study, treatment_period == TRUE), 
                aes(ymin = LB, ymax = UB), 
                color = "red", width = 0.4, alpha = 0.6) +
  # Pre-treatment points
  geom_point(data = subset(event_study, treatment_period == FALSE), 
             size = 2, color = "blue") +
  # Post-treatment points
  geom_point(data = subset(event_study, treatment_period == TRUE), 
             size = 2, color = "red") +
  # Horizontal reference line at 0
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  # Customizing axes and labels
  scale_x_continuous(name = "Periods to Treatment", breaks = seq(-30, 20, by = 10)) +
  scale_y_continuous(name = "ATT", limits = c(-2, 4), breaks = seq(-1, 3, by = 1)) +
  # Adding title and legend
  ggtitle("Event-Study (Callaway Sant'Anna)") +
  scale_color_manual(
    values = c("Pre-treatment" = "blue", "Post-treatment" = "red"), 
    name = NULL
  ) +
  # Styling to match Stata
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.background = element_rect(fill = "gray95"),
    legend.position = "bottom",
    legend.title = element_blank()
  )

# Print the plot
print(callaway_plot)
